Data Read

Nbastats <- read.csv(("C://Users/Tripl/OneDrive/Documents/R/nba.games.stats.csv"), stringsAsFactors = FALSE)
library(prettydoc)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)  
library(animation)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(gganimate)
library(wesanderson)

Create A Season Column and Win Column

Nbastats<- Nbastats %>%
  mutate(Date = lubridate::as_date(Date), 
         season = ifelse(Date > "2014-10-01" & Date < "2015-06-30", "2014-2015",
                      ifelse(Date > "2015-10-01" & Date < "2016-06-30", "2015-2016", 
                            ifelse(Date > "2016-10-01" & Date < "2017-06-30", "2016-2017", "2017-2018"))),
         Win01= ifelse (WINorLOSS=="W", 1, 0))

Here I created a season column so as to help later in my analysis in sifting through season data. Due to changeovers in roster or coaches, individual seasons should be established for data integrity to be maintained. I also created a win loss column in order to later establish how many of the 82 games in a given season each individual team actually won.

Create Effective Field Goal Percentage

Nbastats <- Nbastats %>%
  mutate(EFGperc = (FieldGoals + (.5*X3PointShots))/FieldGoalsAttempted)

Effective field goal percentage is a statistic that bears more relevance then the regular field goal percentage because it accounts for the fact that a made three is worth more than a two point field-goal, despite that regular field goal percentage just weighs them all the same as “shots.”

Summarize To Find Season Long Mean EFG% and Win-Loss Record

 TeamSeasonEFG <- Nbastats %>%
    mutate(EFGperc = (FieldGoals + (.5*X3PointShots))/FieldGoalsAttempted) %>%
    group_by(Team, season) %>%
    summarize(meanEFG = mean(EFGperc), WinCount = sum(Win01), THREEPOINTSHOTSPERGAME = mean(X3PointShotsAttempted), OppPtsPerGame = mean(OpponentPoints), PointsPerGame = mean(TeamPoints), SPG = mean(Steals), Opp3PtShotsAttempted = mean(Opp.3PointShotsAttempted), OppTPG = mean(Opp.Turnovers), TPG = mean(Turnovers), APG = mean(Assists))

Here, I created a summary table of the data sorting by team and season in order to have the teams Effective FG% for the season and their wins in each season. I also included a bunch of other mean statistics for future exploration.

Histogram of Win Count Over 4 Years

WinCountPlot <- ggplot(data=TeamSeasonEFG, aes(TeamSeasonEFG$WinCount)) + geom_bar(fill="black", color = "white") + theme_light()
WinCountPlot + scale_fill_gradient(low="blue", high="red")

Histogram of Win Count Identifying The Three Types of Teams

WinCountScaled <- ggplot()+ geom_bar(data = TeamSeasonEFG[TeamSeasonEFG$WinCount < 30,], mapping = aes(WinCount), fill = "red", color="black")

WinCountColored <- WinCountScaled + geom_bar(data = TeamSeasonEFG[TeamSeasonEFG$WinCount > 49,], mapping = aes(WinCount), fill = "gold", color="black")+ geom_bar(data = TeamSeasonEFG[TeamSeasonEFG$WinCount > 29 & TeamSeasonEFG$WinCount < 50 ,], mapping = aes(WinCount), fill = "black", color="red") + annotate("text", x =19 , y = 6, label = "Pray For Luka or Zion") + annotate("text", x =40 , y = 7, label = "NBA Purgatory") + annotate("text", x =64 , y = 6, label = "There Can Only Be One") + ggtitle("An Examination Of Win Count") 

WinCountColored

Win Count By Season Broken Out

WinCountColoredFacet <- WinCountScaled + geom_bar(data = TeamSeasonEFG[TeamSeasonEFG$WinCount > 49,], mapping = aes(WinCount), fill = "gold", color="black")+ geom_bar(data = TeamSeasonEFG[TeamSeasonEFG$WinCount > 29 & TeamSeasonEFG$WinCount < 50 ,], mapping = aes(WinCount), fill = "black", color = "red") + annotate("text", x =20 , y = 5, label = "Bottom Feeders") + annotate("text", x =40 , y = 7, label = "Purgatory") + annotate("text", x =64 , y = 5, label = "Title Threats") + facet_wrap(~season) + ggtitle("An Examination Of Win Count By Season") 

WinCountColoredFacet

Analysis: The above histograms show a count of win counts, colored by where the team lies in terms of practical ability to win the NBA championship. 50 is by no means a “magic number” but it is a pretty good indicator of the type of team that typically has demonstrated the ability to win the NBA championship. In breaking the historgram up by season, the insight gained is telling. As teams have begun to dive deeper and depper into the analytics of basketball, exploiting every single possible rule within the system has become more and more common. Thus, it is no surprise that 2017-2018 saw a significant increase in “bottom feeder” teams from 2014-2015. In addition to the analytics explosion, the relative sucess of the 76ers tanking experiment “The Process” could have factored into more ownership groups being willing to go all in on the full blown awful.

Correlation Matrix Work

library(ggplot2)
library(corrplot)
## corrplot 0.84 loaded
WinCorrWithNAREMOVAL<- cor(TeamSeasonEFG[, 3:9], use = "pairwise.complete.obs")

knitr::kable(WinCorrWithNAREMOVAL)
meanEFG WinCount THREEPOINTSHOTSPERGAME OppPtsPerGame PointsPerGame SPG Opp3PtShotsAttempted
meanEFG 1.0000000 0.6865368 0.5667305 0.1561670 0.8305779 0.2205922 0.3504467
WinCount 0.6865368 1.0000000 0.2644350 -0.4404047 0.5580927 0.2186950 -0.1177219
THREEPOINTSHOTSPERGAME 0.5667305 0.2644350 1.0000000 0.3615865 0.6089860 0.0660815 0.3997115
OppPtsPerGame 0.1561670 -0.4404047 0.3615865 1.0000000 0.4679852 -0.0181805 0.6563483
PointsPerGame 0.8305779 0.5580927 0.6089860 0.4679852 1.0000000 0.1854090 0.4583397
SPG 0.2205922 0.2186950 0.0660815 -0.0181805 0.1854090 1.0000000 0.1359623
Opp3PtShotsAttempted 0.3504467 -0.1177219 0.3997115 0.6563483 0.4583397 0.1359623 1.0000000
WinCountCorPlot <- corrplot::corrplot(WinCorrWithNAREMOVAL, order = "hclust")

WinCountCorPlot
##                        OppPtsPerGame Opp3PtShotsAttempted         SPG
## OppPtsPerGame             1.00000000            0.6563483 -0.01818052
## Opp3PtShotsAttempted      0.65634826            1.0000000  0.13596232
## SPG                      -0.01818052            0.1359623  1.00000000
## WinCount                 -0.44040468           -0.1177219  0.21869500
## THREEPOINTSHOTSPERGAME    0.36158654            0.3997115  0.06608148
## meanEFG                   0.15616700            0.3504467  0.22059216
## PointsPerGame             0.46798518            0.4583397  0.18540902
##                          WinCount THREEPOINTSHOTSPERGAME   meanEFG
## OppPtsPerGame          -0.4404047             0.36158654 0.1561670
## Opp3PtShotsAttempted   -0.1177219             0.39971154 0.3504467
## SPG                     0.2186950             0.06608148 0.2205922
## WinCount                1.0000000             0.26443500 0.6865368
## THREEPOINTSHOTSPERGAME  0.2644350             1.00000000 0.5667305
## meanEFG                 0.6865368             0.56673055 1.0000000
## PointsPerGame           0.5580927             0.60898596 0.8305779
##                        PointsPerGame
## OppPtsPerGame              0.4679852
## Opp3PtShotsAttempted       0.4583397
## SPG                        0.1854090
## WinCount                   0.5580927
## THREEPOINTSHOTSPERGAME     0.6089860
## meanEFG                    0.8305779
## PointsPerGame              1.0000000

Analysis:

In unsurprising fashion, numerous worthwhile correlations were are to be found in this matrix. Specifically looking at mean EFG, it showes strong correlation to win count, points per game, and three point shots per game.

Hypothesis Before Linear Model:

If a team has more assists over the course of a season, than they will have a higher win count.

WLBoxplot <- ggplot(Nbastats, aes(WINorLOSS, Assists)) +
  geom_boxplot(fill = "blue", color = "red") +
  theme_minimal()

WLBoxplot

Linear Model of Opponent Points Per Game and Steals Per Game

Hypothesis

If a team gets more steals, then its opponent will score less.

Assignment3lmtest <- lm(OppPtsPerGame ~ SPG, data = TeamSeasonEFG)
summary(Assignment3lmtest)
## 
## Call:
## lm(formula = OppPtsPerGame ~ SPG, data = TeamSeasonEFG)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.7027  -3.0317   0.2758   3.0891   9.7196 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 104.37587    3.68412  28.331   <2e-16 ***
## SPG          -0.09335    0.47261  -0.198    0.844    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.316 on 118 degrees of freedom
## Multiple R-squared:  0.0003305,  Adjusted R-squared:  -0.008141 
## F-statistic: 0.03902 on 1 and 118 DF,  p-value: 0.8438

Analysis: I fail to reject the null hypothesis. The p-value was not statistically significant in any way.

Opponent Points Per Game Vs. Steals

OppPtsVsSteals <- ggplot(TeamSeasonEFG, aes(SPG, OppPtsPerGame, col = season)) + geom_smooth(method = "lm", se = FALSE) + theme_classic()+ ggtitle("2014-2018 Analysis: Mean Points Allowed Per Game vs. Mean Steals Per Game") 

OppPtsVsSteals

Analysis: The above visual actually shows what I had already discussed in regard to rejecting the null hypothesis. Only 2017-2018 saw a steady decrease in opponents points per game as a team got more steals per game.

Granular Look At Opponents Points Per Game Vs. Steals

OppPtsVsStealsByseason <- ggplot(TeamSeasonEFG, aes(SPG, OppPtsPerGame, col = WinCount)) + geom_point(size=3)+ geom_smooth(method = "lm", se = FALSE, color = "red")+ theme_classic()+ facet_wrap(~season) + theme(axis.text.x  = element_text(angle=90, vjust=0.5, size=8)) + scale_color_gradient(low="green", high= "purple")+ ggtitle("Granular Season Over Season: Mean Points Allowed Per Game vs. Mean Steals Per Game") 

OppPtsVsStealsByseason

Analysis:

Broken out over season to include win count, there is no statistically significant relationship between these variables when they interact with each other.

Random Intercept Model

library(lme4)
## Loading required package: Matrix
randint <- lmer(OppPtsPerGame ~ SPG + (1|season), data = TeamSeasonEFG)

summary(randint)
## Linear mixed model fit by REML ['lmerMod']
## Formula: OppPtsPerGame ~ SPG + (1 | season)
##    Data: TeamSeasonEFG
## 
## REML criterion at convergence: 651.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.76828 -0.65967  0.02114  0.64232  2.21301 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  season   (Intercept)  7.96    2.821   
##  Residual             12.56    3.544   
## Number of obs: 120, groups:  season, 4
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) 103.77582    3.34407  31.033
## SPG          -0.01593    0.38896  -0.041
## 
## Correlation of Fixed Effects:
##     (Intr)
## SPG -0.901
ranef(randint)
## $season
##           (Intercept)
## 2014-2015  -3.4565071
## 2015-2016  -0.9303196
## 2016-2017   1.8407279
## 2017-2018   2.5460987
## 
## with conditional variances for "season"
library(merTools)
## Loading required package: arm
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is C:/Users/Tripl/OneDrive/Documents/R/Advanced Statistical Inference With Seth
## 
## Attaching package: 'arm'
## The following object is masked from 'package:corrplot':
## 
##     corrplot
plotREsim(REsim(randint), labs = TRUE)

confint(randint)
## Computing profile confidence intervals ...
##                  2.5 %      97.5 %
## .sig01       1.2713629   6.1132495
## .sigma       3.1193212   4.0367741
## (Intercept) 97.3285616 110.2321421
## SPG         -0.7828262   0.7479671

Analysis:

I found this breakdown to be fascinating. In the linear model visualization, you can clearly see the evolution of basketball. In 2014-2015, opponents points per game went up as a team got more steals. This is incredibly counter-intuitive and causes some inferential questions.

30 Team Individual Breakdown Over Four Years

 ThirtyTeamSplit <- ggplot(TeamSeasonEFG, aes(season, meanEFG, group=Team)) + geom_line()+
  geom_point(aes(color=WinCount)) +
   scale_color_gradient(low="green", high= "blue")+
   facet_wrap(~Team) +
  theme(axis.text.x  = element_text(angle=90, vjust=0.5, size=8)) + 
  ggtitle("Mean EFG vs. Win Total Over Time") 
 
 ThirtyTeamSplit

The above visual is a facet wrap of all 30 NBA teams over the course of 4 seasons. It shows the rise or fall in mean effective field goal % and is colored to highlight win count. Most notable here would be the rise of the Philadelphia 76ers. This rise in both variables correlates nicely with “The Process” and the pivot from tanking team to on the rise playoff contender. It also highlights the unparalelled excellence of the Golden State Warriors.

Moving Visual Of Mean EFG and Win Count from 2014/2015 - 2017-2018

MovingWinTotalEFGNBAVIZ <- TeamSeasonEFG %>%
  plot_ly(
    x = ~meanEFG, 
    y = ~WinCount, 
    color = ~Team, 
    frame = ~season, 
    text = ~Team, 
    hoverinfo = "text",
    type = 'scatter',
    mode = 'markers'
  ) %>%
  layout(
    xaxis = list(
      type = "log"
    )
  )

MovingWinTotalEFGNBAVIZ
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

Similarly, this is a moving interactive graphic that lets you evaluate the team performance in EFG% and Win Total over the four seasons.

Mean EFG vs. 3 Point Shots Attempted (Sized For Win Count)

SizingWithThrees<- TeamSeasonEFG %>%
  plot_ly(
    x = ~meanEFG, 
    y = ~THREEPOINTSHOTSPERGAME,
    size = ~WinCount,
    color = ~Team, 
    frame = ~season, 
    text = ~Team, 
    hoverinfo = "text",
    type = 'scatter',
    mode = 'markers'
  ) %>%
  layout(
    xaxis = list(
      type = "log"
    )
  )

SizingWithThrees
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

Analysis:

Here is a comparison of Mean EFG to 3 Point Shots Attempted, sizing the dots by win count in each season. The visual very clearly highlights the change in NBA play style. As teams began to embrace analytics, more and more teams began to see the value in shooting many more threes per game. As that went up, so too did meanEFG (a very good indicator of offensive efficiency being on the rise).

A2TO <- lm(TPG ~ APG, data = TeamSeasonEFG)
summary(A2TO)
## 
## Call:
## lm(formula = TPG ~ APG, data = TeamSeasonEFG)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7381 -0.7603 -0.1902  0.5924  3.5112 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.88054    1.15445   9.425 4.61e-16 ***
## APG          0.12233    0.05099   2.399    0.018 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.161 on 118 degrees of freedom
## Multiple R-squared:  0.04651,    Adjusted R-squared:  0.03843 
## F-statistic: 5.756 on 1 and 118 DF,  p-value: 0.018

FACET WRAP APG vs. TPG

A2TO30TM <- ggplot(TeamSeasonEFG, aes(season, APG, group=Team)) + geom_line()+
  geom_point(aes(color=TPG, size = WinCount)) +
   scale_color_gradient(low="red", high= "blue")+
   facet_wrap(~Team) +
  theme(axis.text.x  = element_text(angle=90, vjust=0.5, size=7)) + 
  ggtitle("APG vs. TPG") 
 
 A2TO30TM

AssistsVsTO <- TeamSeasonEFG %>%
  plot_ly(
    x = ~APG, 
    y = ~TPG,
    size = ~WinCount,
    color = ~Team, 
    frame = ~season, 
    text = ~Team, 
    hoverinfo = "size",
    type = 'scatter',
    mode = 'markers'
  ) %>%
  layout(
    xaxis = list(
      type = "lm"
    )
  )

AssistsVsTO
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors